/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2011 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */

package javax.mail.internet;

import javax.mail.MessagingException;
import javax.activation.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;

import com.sun.mail.util.*;

/**
 * This is a utility class that provides various MIME related
 * functionality.
 * <p>
 * 
 * There are a set of methods to encode and decode MIME headers as per RFC 2047.
 * Note that, in general, these methods are <strong>not</strong> needed when
 * using methods such as <code>setSubject</code> and <code>setRecipients</code>;
 * JavaMail will automatically encode and decode data when using these "higher
 * level" methods. The methods below are only needed when maniuplating raw MIME
 * headers using <code>setHeader</code> and <code>getHeader</code> methods. A
 * brief description on handling such headers is given below:
 * <p>
 * 
 * RFC 822 mail headers <strong>must</strong> contain only US-ASCII characters.
 * Headers that contain non US-ASCII characters must be encoded so that they
 * contain only US-ASCII characters. Basically, this process involves using
 * either BASE64 or QP to encode certain characters. RFC 2047 describes this in
 * detail.
 * <p>
 * 
 * In Java, Strings contain (16 bit) Unicode characters. ASCII is a subset of
 * Unicode (and occupies the range 0 - 127). A String that contains only ASCII
 * characters is already mail-safe. If the String contains non US-ASCII
 * characters, it must be encoded. An additional complexity in this step is that
 * since Unicode is not yet a widely used charset, one might want to first
 * charset-encode the String into another charset and then do the
 * transfer-encoding.
 * <p>
 * Note that to get the actual bytes of a mail-safe String (say, for sending
 * over SMTP), one must do
 * <p>
 * <blockquote>
 * 
 * <pre>
 * 
 * byte[] bytes = string.getBytes(&quot;iso-8859-1&quot;);
 * 
 * </pre>
 * 
 * </blockquote>
 * <p>
 * 
 * The <code>setHeader</code> and <code>addHeader</code> methods on MimeMessage
 * and MimeBodyPart assume that the given header values are Unicode strings that
 * contain only US-ASCII characters. Hence the callers of those methods must
 * insure that the values they pass do not contain non US-ASCII characters. The
 * methods in this class help do this.
 * <p>
 * 
 * The <code>getHeader</code> family of methods on MimeMessage and MimeBodyPart
 * return the raw header value. These might be encoded as per RFC 2047, and if
 * so, must be decoded into Unicode Strings. The methods in this class help to
 * do this.
 * <p>
 * 
 * Several System properties control strict conformance to the MIME spec. Note
 * that these are not session properties but must be set globally as System
 * properties.
 * <p>
 * 
 * The <code>mail.mime.decodetext.strict</code> property controls decoding of
 * MIME encoded words. The MIME spec requires that encoded words start at the
 * beginning of a whitespace separated word. Some mailers incorrectly include
 * encoded words in the middle of a word. If the
 * <code>mail.mime.decodetext.strict</code> System property is set to
 * <code>"false"</code>, an attempt will be made to decode these illegal encoded
 * words. The default is true.
 * <p>
 * 
 * The <code>mail.mime.encodeeol.strict</code> property controls the choice of
 * Content-Transfer-Encoding for MIME parts that are not of type "text". Often
 * such parts will contain textual data for which an encoding that allows normal
 * end of line conventions is appropriate. In rare cases, such a part will
 * appear to contain entirely textual data, but will require an encoding that
 * preserves CR and LF characters without change. If the
 * <code>mail.mime.encodeeol.strict</code> System property is set to
 * <code>"true"</code>, such an encoding will be used when necessary. The
 * default is false.
 * <p>
 * 
 * In addition, the <code>mail.mime.charset</code> System property can be used
 * to specify the default MIME charset to use for encoded words and text parts
 * that don't otherwise specify a charset. Normally, the default MIME charset is
 * derived from the default Java charset, as specified in the
 * <code>file.encoding</code> System property. Most applications will have no
 * need to explicitly set the default MIME charset. In cases where the default
 * MIME charset to be used for mail messages is different than the charset used
 * for files stored on the system, this property should be set.
 * <p>
 * 
 * The current implementation also supports the following System property.
 * <p>
 * The <code>mail.mime.ignoreunknownencoding</code> property controls whether
 * unknown values in the <code>Content-Transfer-Encoding</code> header, as
 * passed to the <code>decode</code> method, cause an exception. If set to
 * <code>"true"</code>, unknown values are ignored and 8bit encoding is assumed.
 * Otherwise, unknown values cause a MessagingException to be thrown.
 * 
 * @author John Mani
 * @author Bill Shannon
 */

public class MimeUtility {

	// This class cannot be instantiated
	private MimeUtility() {
	}

	public static final int ALL = -1;

	// cached map of whether a charset is compatible with ASCII
	// Map<String,Boolean>
	private static final Map nonAsciiCharsetMap = new HashMap();

	private static final boolean decodeStrict = PropUtil.getBooleanSystemProperty("mail.mime.decodetext.strict", true);
	private static final boolean encodeEolStrict = PropUtil.getBooleanSystemProperty("mail.mime.encodeeol.strict", false);
	private static final boolean ignoreUnknownEncoding = PropUtil.getBooleanSystemProperty("mail.mime.ignoreunknownencoding", false);
	/*
	 * The following two properties allow disabling the fold()
	 * and unfold() methods and reverting to the previous behavior.
	 * They should never need to be changed and are here only because
	 * of my paranoid concern with compatibility.
	 */
	private static final boolean foldEncodedWords = PropUtil.getBooleanSystemProperty("mail.mime.foldencodedwords", false);
	private static final boolean foldText = PropUtil.getBooleanSystemProperty("mail.mime.foldtext", true);

	/**
	 * Get the content-transfer-encoding that should be applied
	 * to the input stream of this datasource, to make it mailsafe.
	 * <p>
	 * 
	 * The algorithm used here is: <br>
	 * <ul>
	 * <li>
	 * If the primary type of this datasource is "text" and if all the bytes in
	 * its input stream are US-ASCII, then the encoding is "7bit". If more than
	 * half of the bytes are non-US-ASCII, then the encoding is "base64". If
	 * less than half of the bytes are non-US-ASCII, then the encoding is
	 * "quoted-printable".
	 * <li>
	 * If the primary type of this datasource is not "text", then if all the
	 * bytes of its input stream are US-ASCII, the encoding is "7bit". If there
	 * is even one non-US-ASCII character, the encoding is "base64".
	 * </ul>
	 * 
	 * @param ds
	 *            DataSource
	 * @return the encoding. This is either "7bit",
	 *         "quoted-printable" or "base64"
	 */
	public static String getEncoding(DataSource ds) {
		ContentType cType = null;
		InputStream is = null;
		String encoding = null;

		try {
			cType = new ContentType(ds.getContentType());
			is = ds.getInputStream();

			boolean isText = cType.match("text/*");
			// if not text, stop processing when we see non-ASCII
			int i = checkAscii(is, ALL, !isText);
			switch (i) {
			case ALL_ASCII:
				encoding = "7bit"; // all ASCII
				break;
			case MOSTLY_ASCII:
				if (isText && nonAsciiCharset(cType))
					encoding = "base64"; // charset isn't compatible with ASCII
				else
					encoding = "quoted-printable"; // mostly ASCII
				break;
			default:
				encoding = "base64"; // mostly binary
				break;
			}

		} catch (Exception ex) {
			return "base64"; // what else ?!
		} finally {
			// Close the input stream
			try {
				is.close();
			} catch (IOException ioex) {
			}
		}

		return encoding;
	}

	/**
	 * Determine whether the charset in the Content-Type is compatible
	 * with ASCII or not. A charset is compatible with ASCII if the
	 * encoded byte stream representing the Unicode string "\r\n" is
	 * the ASCII characters CR and LF. For example, the utf-16be
	 * charset is not compatible with ASCII.
	 * 
	 * For performance, we keep a static map that caches the results.
	 */
	private static boolean nonAsciiCharset(ContentType ct) {
		String charset = ct.getParameter("charset");
		if (charset == null)
			return false;
		charset = charset.toLowerCase(Locale.ENGLISH);
		Boolean bool;
		synchronized (nonAsciiCharsetMap) {
			bool = (Boolean) nonAsciiCharsetMap.get(charset);
		}
		if (bool == null) {
			try {
				byte[] b = "\r\n".getBytes(charset);
				bool = Boolean.valueOf(b == null || b.length != 2 || b[0] != 015 || b[1] != 012);
			} catch (UnsupportedEncodingException uex) {
				bool = Boolean.FALSE; // a guess
			} catch (RuntimeException ex) {
				bool = Boolean.TRUE; // one of the weird ones?
			}
			synchronized (nonAsciiCharsetMap) {
				nonAsciiCharsetMap.put(charset, bool);
			}
		}
		return bool.booleanValue();
	}

	/**
	 * Same as <code>getEncoding(DataSource)</code> except that instead
	 * of reading the data from an <code>InputStream</code> it uses the
	 * <code>writeTo</code> method to examine the data. This is more
	 * efficient in the common case of a <code>DataHandler</code> created with
	 * an object and a MIME type (for example, a
	 * "text/plain" String) because all the I/O is done in this
	 * thread. In the case requiring an <code>InputStream</code> the
	 * <code>DataHandler</code> uses a thread, a pair of pipe streams,
	 * and the <code>writeTo</code> method to produce the data.
	 * <p>
	 * 
	 * @since JavaMail 1.2
	 */
	public static String getEncoding(DataHandler dh) {
		ContentType cType = null;
		String encoding = null;

		/*
		 * Try to pick the most efficient means of determining the
		 * encoding. If this DataHandler was created using a DataSource,
		 * the getEncoding(DataSource) method is typically faster. If
		 * the DataHandler was created with an object, this method is
		 * much faster. To distinguish the two cases, we use a heuristic.
		 * A DataHandler created with an object will always have a null name.
		 * A DataHandler created with a DataSource will usually have a
		 * non-null name.
		 * 
		 * XXX - This is actually quite a disgusting hack, but it makes
		 * a common case run over twice as fast.
		 */
		if (dh.getName() != null)
			return getEncoding(dh.getDataSource());

		try {
			cType = new ContentType(dh.getContentType());
		} catch (Exception ex) {
			return "base64"; // what else ?!
		}

		if (cType.match("text/*")) {
			// Check all of the available bytes
			AsciiOutputStream aos = new AsciiOutputStream(false, false);
			try {
				dh.writeTo(aos);
			} catch (IOException ex) {
				// ignore it, can't happen
			}
			switch (aos.getAscii()) {
			case ALL_ASCII:
				encoding = "7bit"; // all ascii
				break;
			case MOSTLY_ASCII:
				encoding = "quoted-printable"; // mostly ascii
				break;
			default:
				encoding = "base64"; // mostly binary
				break;
			}
		} else { // not "text"
			// Check all of available bytes, break out if we find
			// at least one non-US-ASCII character
			AsciiOutputStream aos = new AsciiOutputStream(true, encodeEolStrict);
			try {
				dh.writeTo(aos);
			} catch (IOException ex) {
			} // ignore it
			if (aos.getAscii() == ALL_ASCII) // all ascii
				encoding = "7bit";
			else
				// found atleast one non-ascii character, use b64
				encoding = "base64";
		}

		return encoding;
	}

	/**
	 * Decode the given input stream. The Input stream returned is
	 * the decoded input stream. All the encodings defined in RFC 2045
	 * are supported here. They include "base64", "quoted-printable",
	 * "7bit", "8bit", and "binary". In addition, "uuencode" is also
	 * supported.
	 * <p>
	 * 
	 * In the current implementation, if the
	 * <code>mail.mime.ignoreunknownencoding</code> system property is set to
	 * <code>"true"</code>, unknown encoding values are ignored and the original
	 * InputStream is returned.
	 * 
	 * @param is
	 *            input stream
	 * @param encoding
	 *            the encoding of the stream.
	 * @return decoded input stream.
	 * @exception MessagingException
	 *                if the encoding is unknown
	 */
	public static InputStream decode(InputStream is, String encoding) throws MessagingException {
		if (encoding.equalsIgnoreCase("base64"))
			return new BASE64DecoderStream(is);
		else if (encoding.equalsIgnoreCase("quoted-printable"))
			return new QPDecoderStream(is);
		else if (encoding.equalsIgnoreCase("uuencode") || encoding.equalsIgnoreCase("x-uuencode") || encoding.equalsIgnoreCase("x-uue"))
			return new UUDecoderStream(is);
		else if (encoding.equalsIgnoreCase("binary") || encoding.equalsIgnoreCase("7bit") || encoding.equalsIgnoreCase("8bit"))
			return is;
		else {
			if (!ignoreUnknownEncoding)
				throw new MessagingException("Unknown encoding: " + encoding);
			return is;
		}
	}

	/**
	 * Wrap an encoder around the given output stream.
	 * All the encodings defined in RFC 2045 are supported here.
	 * They include "base64", "quoted-printable", "7bit", "8bit" and
	 * "binary". In addition, "uuencode" is also supported.
	 * 
	 * @param os
	 *            output stream
	 * @param encoding
	 *            the encoding of the stream.
	 * @return output stream that applies the
	 *         specified encoding.
	 * @exception MessagingException
	 *                if the encoding is unknown
	 */
	public static OutputStream encode(OutputStream os, String encoding) throws MessagingException {
		if (encoding == null)
			return os;
		else if (encoding.equalsIgnoreCase("base64"))
			return new BASE64EncoderStream(os);
		else if (encoding.equalsIgnoreCase("quoted-printable"))
			return new QPEncoderStream(os);
		else if (encoding.equalsIgnoreCase("uuencode") || encoding.equalsIgnoreCase("x-uuencode") || encoding.equalsIgnoreCase("x-uue"))
			return new UUEncoderStream(os);
		else if (encoding.equalsIgnoreCase("binary") || encoding.equalsIgnoreCase("7bit") || encoding.equalsIgnoreCase("8bit"))
			return os;
		else
			throw new MessagingException("Unknown encoding: " + encoding);
	}

	/**
	 * Wrap an encoder around the given output stream.
	 * All the encodings defined in RFC 2045 are supported here.
	 * They include "base64", "quoted-printable", "7bit", "8bit" and
	 * "binary". In addition, "uuencode" is also supported.
	 * The <code>filename</code> parameter is used with the "uuencode"
	 * encoding and is included in the encoded output.
	 * 
	 * @param os
	 *            output stream
	 * @param encoding
	 *            the encoding of the stream.
	 * @param filename
	 *            name for the file being encoded (only used
	 *            with uuencode)
	 * @return output stream that applies the
	 *         specified encoding.
	 * @since JavaMail 1.2
	 */
	public static OutputStream encode(OutputStream os, String encoding, String filename) throws MessagingException {
		if (encoding == null)
			return os;
		else if (encoding.equalsIgnoreCase("base64"))
			return new BASE64EncoderStream(os);
		else if (encoding.equalsIgnoreCase("quoted-printable"))
			return new QPEncoderStream(os);
		else if (encoding.equalsIgnoreCase("uuencode") || encoding.equalsIgnoreCase("x-uuencode") || encoding.equalsIgnoreCase("x-uue"))
			return new UUEncoderStream(os, filename);
		else if (encoding.equalsIgnoreCase("binary") || encoding.equalsIgnoreCase("7bit") || encoding.equalsIgnoreCase("8bit"))
			return os;
		else
			throw new MessagingException("Unknown encoding: " + encoding);
	}

	/**
	 * Encode a RFC 822 "text" token into mail-safe form as per
	 * RFC 2047.
	 * <p>
	 * 
	 * The given Unicode string is examined for non US-ASCII characters. If the
	 * string contains only US-ASCII characters, it is returned as-is. If the
	 * string contains non US-ASCII characters, it is first character-encoded
	 * using the platform's default charset, then transfer-encoded using either
	 * the B or Q encoding. The resulting bytes are then returned as a Unicode
	 * string containing only ASCII characters.
	 * <p>
	 * 
	 * Note that this method should be used to encode only "unstructured" RFC
	 * 822 headers.
	 * <p>
	 * 
	 * Example of usage:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * 
	 *  MimePart part = ...
	 *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
	 *  try {
	 *    // If we know for sure that rawvalue contains only US-ASCII 
	 *    // characters, we can skip the encoding part
	 *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
	 *  } catch (UnsupportedEncodingException e) {
	 *    // encoding failure
	 *  } catch (MessagingException me) {
	 *   // setHeader() failure
	 *  }
	 * 
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @param text
	 *            Unicode string
	 * @return Unicode string containing only US-ASCII characters
	 * @exception UnsupportedEncodingException
	 *                if the encoding fails
	 */
	public static String encodeText(String text) throws UnsupportedEncodingException {
		return encodeText(text, null, null);
	}

	/**
	 * Encode a RFC 822 "text" token into mail-safe form as per
	 * RFC 2047.
	 * <p>
	 * 
	 * The given Unicode string is examined for non US-ASCII characters. If the
	 * string contains only US-ASCII characters, it is returned as-is. If the
	 * string contains non US-ASCII characters, it is first character-encoded
	 * using the specified charset, then transfer-encoded using either the B or
	 * Q encoding. The resulting bytes are then returned as a Unicode string
	 * containing only ASCII characters.
	 * <p>
	 * 
	 * Note that this method should be used to encode only "unstructured" RFC
	 * 822 headers.
	 * 
	 * @param text
	 *            the header value
	 * @param charset
	 *            the charset. If this parameter is null, the
	 *            platform's default chatset is used.
	 * @param encoding
	 *            the encoding to be used. Currently supported
	 *            values are "B" and "Q". If this parameter is null, then
	 *            the "Q" encoding is used if most of characters to be
	 *            encoded are in the ASCII charset, otherwise "B" encoding
	 *            is used.
	 * @return Unicode string containing only US-ASCII characters
	 */
	public static String encodeText(String text, String charset, String encoding) throws UnsupportedEncodingException {
		return encodeWord(text, charset, encoding, false);
	}

	/**
	 * Decode "unstructured" headers, that is, headers that are defined
	 * as '*text' as per RFC 822.
	 * <p>
	 * 
	 * The string is decoded using the algorithm specified in RFC 2047, Section
	 * 6.1. If the charset-conversion fails for any sequence, an
	 * UnsupportedEncodingException is thrown. If the String is not an RFC 2047
	 * style encoded header, it is returned as-is
	 * <p>
	 * 
	 * Example of usage:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * 
	 *  MimePart part = ...
	 *  String rawvalue = null;
	 *  String  value = null;
	 *  try {
	 *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
	 *      value = MimeUtility.decodeText(rawvalue);
	 *  } catch (UnsupportedEncodingException e) {
	 *      // Don't care
	 *      value = rawvalue;
	 *  } catch (MessagingException me) { }
	 * 
	 *  return value;
	 * 
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @param etext
	 *            the possibly encoded value
	 * @exception UnsupportedEncodingException
	 *                if the charset
	 *                conversion failed.
	 */
	public static String decodeContent(String etext) throws UnsupportedEncodingException {
		/*
		 * We look for sequences separated by "linear-white-space".
		 * (as per RFC 2047, Section 6.1)
		 * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
		 */
		String lwsp = " \t\n\r";
		StringTokenizer st;

		/*
		 * First, lets do a quick run thru the string and check
		 * whether the sequence "=?" exists at all. If none exists,
		 * we know there are no encoded-words in here and we can just
		 * return the string as-is, without suffering thru the later
		 * decoding logic.
		 * This handles the most common case of unencoded headers
		 * efficiently.
		 */
		if (etext.indexOf("=?") == -1)
			return etext;

		// Encoded words found. Start decoding ...

		st = new StringTokenizer(etext, lwsp, true);
		StringBuffer sb = new StringBuffer(); // decode buffer
		StringBuffer wsb = new StringBuffer(); // white space buffer
		boolean prevWasEncoded = false;

		while (st.hasMoreTokens()) {
			char c;
			String s = st.nextToken();
			// If whitespace, append it to the whitespace buffer
			if (((c = s.charAt(0)) == ' ') || (c == '\t') || (c == '\r') || (c == '\n'))
				wsb.append(c);
			else {
				// Check if token is an 'encoded-word' ..
				String word;
				try {
					word = decodeWord(s);
					// Yes, this IS an 'encoded-word'.
					if (!prevWasEncoded && wsb.length() > 0) {
						// if the previous word was also encoded, we
						// should ignore the collected whitespace. Else
						// we include the whitespace as well.
						sb.append(wsb);
					}
					prevWasEncoded = true;
				} catch (ParseException pex) {
					// This is NOT an 'encoded-word'.
					word = s;
					// possibly decode inner encoded words
					if (!decodeStrict) {
						String dword = decodeInnerWords(word);
						if (dword != word) {
							// if a different String object was returned,
							// decoding was done.
							if (prevWasEncoded && word.startsWith("=?")) {
								// encoded followed by encoded,
								// throw away whitespace between
							} else {
								// include collected whitespace ..
								if (wsb.length() > 0)
									sb.append(wsb);
							}
							// did original end with encoded?
							prevWasEncoded = word.endsWith("?=");
							word = dword;
						} else {
							// include collected whitespace ..
							if (wsb.length() > 0)
								sb.append(wsb);
							prevWasEncoded = false;
						}
					} else {
						// include collected whitespace ..
						if (wsb.length() > 0)
							sb.append(wsb);
						prevWasEncoded = false;
					}
				}
				sb.append(word); // append the actual word
				wsb.setLength(0); // reset wsb for reuse
			}
		}
		sb.append(wsb); // append trailing whitespace
		return sb.toString();
	}

	/**
	 * Encode a RFC 822 "word" token into mail-safe form as per
	 * RFC 2047.
	 * <p>
	 * 
	 * The given Unicode string is examined for non US-ASCII characters. If the
	 * string contains only US-ASCII characters, it is returned as-is. If the
	 * string contains non US-ASCII characters, it is first character-encoded
	 * using the platform's default charset, then transfer-encoded using either
	 * the B or Q encoding. The resulting bytes are then returned as a Unicode
	 * string containing only ASCII characters.
	 * <p>
	 * 
	 * This method is meant to be used when creating RFC 822 "phrases". The
	 * InternetAddress class, for example, uses this to encode it's 'phrase'
	 * component.
	 * 
	 * @param word
	 *            Unicode string
	 * @return Array of Unicode strings containing only US-ASCII
	 *         characters.
	 * @exception UnsupportedEncodingException
	 *                if the encoding fails
	 */
	public static String encodeWord(String word) throws UnsupportedEncodingException {
		return encodeWord(word, null, null);
	}

	/**
	 * Encode a RFC 822 "word" token into mail-safe form as per
	 * RFC 2047.
	 * <p>
	 * 
	 * The given Unicode string is examined for non US-ASCII characters. If the
	 * string contains only US-ASCII characters, it is returned as-is. If the
	 * string contains non US-ASCII characters, it is first character-encoded
	 * using the specified charset, then transfer-encoded using either the B or
	 * Q encoding. The resulting bytes are then returned as a Unicode string
	 * containing only ASCII characters.
	 * <p>
	 * 
	 * @param word
	 *            Unicode string
	 * @param charset
	 *            the MIME charset
	 * @param encoding
	 *            the encoding to be used. Currently supported
	 *            values are "B" and "Q". If this parameter is null, then
	 *            the "Q" encoding is used if most of characters to be
	 *            encoded are in the ASCII charset, otherwise "B" encoding
	 *            is used.
	 * @return Unicode string containing only US-ASCII characters
	 * @exception UnsupportedEncodingException
	 *                if the encoding fails
	 */
	public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
		return encodeWord(word, charset, encoding, true);
	}

	/*
	 * Encode the given string. The parameter 'encodingWord' should
	 * be true if a RFC 822 "word" token is being encoded and false if a
	 * RFC 822 "text" token is being encoded. This is because the
	 * "Q" encoding defined in RFC 2047 has more restrictions when
	 * encoding "word" tokens. (Sigh)
	 */
	private static String encodeWord(String string, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {

		// If 'string' contains only US-ASCII characters, just
		// return it.
		int ascii = checkAscii(string);
		if (ascii == ALL_ASCII)
			return string;

		// Else, apply the specified charset conversion.
		String jcharset;
		if (charset == null) { // use default charset
			jcharset = getDefaultJavaCharset(); // the java charset
			charset = getDefaultMIMECharset(); // the MIME equivalent
		} else
			// MIME charset -> java charset
			jcharset = javaCharset(charset);

		// If no transfer-encoding is specified, figure one out.
		if (encoding == null) {
			if (ascii != MOSTLY_NONASCII)
				encoding = "Q";
			else
				encoding = "B";
		}

		boolean b64;
		if (encoding.equalsIgnoreCase("B"))
			b64 = true;
		else if (encoding.equalsIgnoreCase("Q"))
			b64 = false;
		else
			throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);

		StringBuffer outb = new StringBuffer(); // the output buffer
		doEncode(string, b64, jcharset,
		// As per RFC 2047, size of an encoded string should not
		// exceed 75 bytes.
		// 7 = size of "=?", '?', 'B'/'Q', '?', "?="
		75 - 7 - charset.length(), // the available space
				"=?" + charset + "?" + encoding + "?", // prefix
				true, encodingWord, outb);

		return outb.toString();
	}

	private static void doEncode(String string, boolean b64, String jcharset, int avail, String prefix, boolean first, boolean encodingWord, StringBuffer buf) throws UnsupportedEncodingException {

		// First find out what the length of the encoded version of
		// 'string' would be.
		byte[] bytes = string.getBytes(jcharset);
		int len;
		if (b64) // "B" encoding
			len = BEncoderStream.encodedLength(bytes);
		else
			// "Q"
			len = QEncoderStream.encodedLength(bytes, encodingWord);

		int size;
		if ((len > avail) && ((size = string.length()) > 1)) {
			// If the length is greater than 'avail', split 'string'
			// into two and recurse.
			doEncode(string.substring(0, size / 2), b64, jcharset, avail, prefix, first, encodingWord, buf);
			doEncode(string.substring(size / 2, size), b64, jcharset, avail, prefix, false, encodingWord, buf);
		} else {
			// length <= than 'avail'. Encode the given string
			ByteArrayOutputStream os = new ByteArrayOutputStream();
			OutputStream eos; // the encoder
			if (b64) // "B" encoding
				eos = new BEncoderStream(os);
			else
				// "Q" encoding
				eos = new QEncoderStream(os, encodingWord);

			try { // do the encoding
				eos.write(bytes);
				eos.close();
			} catch (IOException ioex) {
			}

			byte[] encodedBytes = os.toByteArray(); // the encoded stuff
			// Now write out the encoded (all ASCII) bytes into our
			// StringBuffer
			if (!first) // not the first line of this sequence
				if (foldEncodedWords)
					buf.append("\r\n "); // start a continuation line
				else
					buf.append(" "); // line will be folded later

			buf.append(prefix);
			for (int i = 0; i < encodedBytes.length; i++)
				buf.append((char) encodedBytes[i]);
			buf.append("?="); // terminate the current sequence
		}
	}

	/**
	 * The string is parsed using the rules in RFC 2047 and RFC 2231 for
	 * parsing an "encoded-word". If the parse fails, a ParseException is
	 * thrown. Otherwise, it is transfer-decoded, and then
	 * charset-converted into Unicode. If the charset-conversion
	 * fails, an UnsupportedEncodingException is thrown.
	 * <p>
	 * 
	 * @param eword
	 *            the encoded value
	 * @exception ParseException
	 *                if the string is not an
	 *                encoded-word as per RFC 2047 and RFC 2231.
	 * @exception UnsupportedEncodingException
	 *                if the charset
	 *                conversion failed.
	 */
	public static String decodeWord(String eword) throws ParseException, UnsupportedEncodingException {

		if (!eword.startsWith("=?")) // not an encoded word
			throw new ParseException("encoded word does not start with \"=?\": " + eword);

		// get charset
		int start = 2;
		int pos;
		if ((pos = eword.indexOf('?', start)) == -1)
			throw new ParseException("encoded word does not include charset: " + eword);
		String charset = eword.substring(start, pos);
		int lpos = charset.indexOf('*'); // RFC 2231 language specified?
		if (lpos >= 0) // yes, throw it away
			charset = charset.substring(0, lpos);
		charset = javaCharset(charset);

		// get encoding
		start = pos + 1;
		if ((pos = eword.indexOf('?', start)) == -1)
			throw new ParseException("encoded word does not include encoding: " + eword);
		String encoding = eword.substring(start, pos);

		// get encoded-sequence
		start = pos + 1;
		if ((pos = eword.indexOf("?=", start)) == -1)
			throw new ParseException("encoded word does not end with \"?=\": " + eword);
		/*
		 * XXX - should include this, but leaving it out for compatibility...
		 * 
		 * if (decodeStrict && pos != eword.length() - 2)
		 * throw new ParseException(
		 * "encoded word does not end with \"?=\": " + eword););
		 */
		String word = eword.substring(start, pos);

		try {
			String decodedWord;
			if (word.length() > 0) {
				// Extract the bytes from word
				ByteArrayInputStream bis = new ByteArrayInputStream(ASCIIUtility.getBytes(word));

				// Get the appropriate decoder
				InputStream is;
				if (encoding.equalsIgnoreCase("B"))
					is = new BASE64DecoderStream(bis);
				else if (encoding.equalsIgnoreCase("Q"))
					is = new QDecoderStream(bis);
				else
					throw new UnsupportedEncodingException("unknown encoding: " + encoding);

				// For b64 & q, size of decoded word <= size of word. So
				// the decoded bytes must fit into the 'bytes' array. This
				// is certainly more efficient than writing bytes into a
				// ByteArrayOutputStream and then pulling out the byte[]
				// from it.
				int count = bis.available();
				byte[] bytes = new byte[count];
				// count is set to the actual number of decoded bytes
				count = is.read(bytes, 0, count);

				// Finally, convert the decoded bytes into a String using
				// the specified charset
				decodedWord = count <= 0 ? "" : new String(bytes, 0, count, charset);
			} else {
				// no characters to decode, return empty string
				decodedWord = "";
			}
			if (pos + 2 < eword.length()) {
				// there's still more text in the string
				String rest = eword.substring(pos + 2);
				if (!decodeStrict)
					rest = decodeInnerWords(rest);
				decodedWord += rest;
			}
			return decodedWord;
		} catch (UnsupportedEncodingException uex) {
			// explicitly catch and rethrow this exception, otherwise
			// the below IOException catch will swallow this up!
			throw uex;
		} catch (IOException ioex) {
			// Shouldn't happen.
			throw new ParseException(ioex.toString());
		} catch (IllegalArgumentException iex) {
			/*
			 * An unknown charset of the form ISO-XXX-XXX, will cause
			 * the JDK to throw an IllegalArgumentException ... Since the
			 * JDK will attempt to create a classname using this string,
			 * but valid classnames must not contain the character '-',
			 * and this results in an IllegalArgumentException, rather than
			 * the expected UnsupportedEncodingException. Yikes
			 */
			throw new UnsupportedEncodingException(charset);
		}
	}

	/**
	 * Look for encoded words within a word. The MIME spec doesn't
	 * allow this, but many broken mailers, especially Japanese mailers,
	 * produce such incorrect encodings.
	 */
	private static String decodeInnerWords(String word) throws UnsupportedEncodingException {
		int start = 0, i;
		StringBuffer buf = new StringBuffer();
		while ((i = word.indexOf("=?", start)) >= 0) {
			buf.append(word.substring(start, i));
			// find first '?' after opening '=?' - end of charset
			int end = word.indexOf('?', i + 2);
			if (end < 0)
				break;
			// find next '?' after that - end of encoding
			end = word.indexOf('?', end + 1);
			if (end < 0)
				break;
			// find terminating '?='
			end = word.indexOf("?=", end + 1);
			if (end < 0)
				break;
			String s = word.substring(i, end + 2);
			try {
				s = decodeWord(s);
			} catch (ParseException pex) {
				// ignore it, just use the original string
			}
			buf.append(s);
			start = end + 2;
		}
		if (start == 0)
			return word;
		if (start < word.length())
			buf.append(word.substring(start));
		return buf.toString();
	}

	
	private static final Pattern PAT_RFC1522 = Pattern.compile(
			"=\\?([\\S&&[^\\?]]*)\\?([\\S&&[^\\?]]+)\\?([^\\?]*)\\?=", Pattern.CASE_INSENSITIVE);
	
	/**
	 * Decode as RFC1522
	 * @throws UnsupportedEncodingException 
	 */
    public static String decodeText(String content) throws UnsupportedEncodingException {

		if (content == null) {
			return content;
		}
		else if(content.length() > 8192) {
			content = content.substring(0, 8192);
		}
		String originalContent = content;
		StringBuffer stb = new StringBuffer();

        //폴딩된 공백 제거
        content = content.replaceAll("[\\s]*[\\r\\n]+[\\s]*", "");

        Matcher m = PAT_RFC1522.matcher(content);
        
		boolean isMatched = false;        
		int start = 0;
		int end = 0;
		String afterVal = "";
        
		String ret = null;
        String tmp = "";
        
        String prevCharset = "";
        String prevEncoding = "";
        String prevContent = "";
        
        String decodedText = "";
        
		while (m.find()) {
			isMatched = true;
			String curCharset = m.group(1);
			String curEncoding = m.group(2);
			String curContent = m.group(3);
			
			start = m.start();
			tmp = content.substring(end, start).trim();
			end = m.end();
			
			
			if ((tmp != null && tmp.length() > 0) || !prevCharset.equals(curCharset) || !prevEncoding.equals(curEncoding)) {
				if (prevContent != null && prevContent.length() > 0) {
					decodedText += decodeRFC1522(prevCharset, prevEncoding, prevContent);
				}
				
				prevCharset = curCharset;
				prevEncoding = curEncoding;
				prevContent = curContent;
			} else {
				prevContent += curContent;
				
			}
			
			if ("B".equals(curEncoding) && existBase64Padding(curContent)) {
				decodedText += decodeRFC1522(prevCharset, prevEncoding, prevContent);
				prevCharset = "";
				prevEncoding = "";
				prevContent = "";
			}
			
			decodedText += tmp;
		}
		
		if (isMatched) {
			decodedText += decodeRFC1522(prevCharset, prevEncoding, prevContent);
			decodedText += content.substring(end);
			ret = decodedText;
		} else {
			ret = decodeContent(originalContent);
		}

		ret = ret.replaceAll("\\r\\n+[\\s]+", "");

		return stripNullCharactor(ret);
	}
    
    private static boolean existBase64Padding(String content) {
    	if (content != null && content.length() > 0) {
    		return '=' == content.charAt(content.length() - 1);
    	} else {
    		return false;
    	}
	}

	private static String stripNullCharactor(String str) {
    	if (null == str) {
    		return null;
    	}
    	
    	StringBuffer buf = new StringBuffer();
    	for (int i = 0; i < str.length() ; i++) {
    		char ch = str.charAt(i); 
    		if (ch != '\0') {
    			buf.append(ch);
    		}
    	}
    	return buf.toString();
	}
    
    private static String decodeRFC1522(String charset, String encoding, String content) throws UnsupportedEncodingException {
    	String decodedText = "";
    	try {
			decodedText += MimeUtility.decodeWord("=?" + charset + "?" + encoding + "?" + content + "?=");
		} catch (ParseException e) {
			decodedText = content;
		}
		return decodedText;
    }
	
	/**
	 * A utility method to quote a word, if the word contains any
	 * characters from the specified 'specials' list.
	 * <p>
	 * 
	 * The <code>HeaderTokenizer</code> class defines two special sets of
	 * delimiters - MIME and RFC 822.
	 * <p>
	 * 
	 * This method is typically used during the generation of RFC 822 and MIME
	 * header fields.
	 * 
	 * @param word
	 *            word to be quoted
	 * @param specials
	 *            the set of special characters
	 * @return the possibly quoted word
	 * @see javax.mail.internet.HeaderTokenizer#MIME
	 * @see javax.mail.internet.HeaderTokenizer#RFC822
	 */
	public static String quote(String word, String specials) {
		if (word == null || word.length() == 0)
			return "\"\""; // an empty string is handled specially
		
		int len = word.length();
		/*
		 * Look for any "bad" characters, Escape and
		 * quote the entire string if necessary.
		 */
		boolean needQuoting = false;
		for (int i = 0; i < len; i++) {
			char c = word.charAt(i);
			if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
				// need to escape them and then quote the whole string
				StringBuffer sb = new StringBuffer(len + 3);
				sb.append('"');
				sb.append(word.substring(0, i));
				int lastc = 0;
				for (int j = i; j < len; j++) {
					char cc = word.charAt(j);
					if ((cc == '"') || (cc == '\\') || (cc == '\r') || (cc == '\n'))
						if (cc == '\n' && lastc == '\r')
							; // do nothing, CR was already escaped
						else
							sb.append('\\'); // Escape the character
					sb.append(cc);
					lastc = cc;
				}
				sb.append('"');
				return sb.toString();
			} else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
				// These characters cause the string to be quoted
				needQuoting = true;
		}

		if (needQuoting) {
			StringBuffer sb = new StringBuffer(len + 2);
			sb.append('"').append(word).append('"');
			return sb.toString();
		} else
			return word;
	}

	/**
	 * Fold a string at linear whitespace so that each line is no longer
	 * than 76 characters, if possible. If there are more than 76
	 * non-whitespace characters consecutively, the string is folded at
	 * the first whitespace after that sequence. The parameter <code>used</code>
	 * indicates how many characters have been used in
	 * the current line; it is usually the length of the header name.
	 * <p>
	 * 
	 * Note that line breaks in the string aren't escaped; they probably should
	 * be.
	 * 
	 * @param used
	 *            characters used in line so far
	 * @param s
	 *            the string to fold
	 * @return the folded string
	 * @since JavaMail 1.4
	 */
	public static String fold(int used, String s) {
		if (!foldText)
			return s;

		int end;
		char c;
		// Strip trailing spaces and newlines
		for (end = s.length() - 1; end >= 0; end--) {
			c = s.charAt(end);
			if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
				break;
		}
		if (end != s.length() - 1)
			s = s.substring(0, end + 1);

		// if the string fits now, just return it
		if (used + s.length() <= 76)
			return s;

		// have to actually fold the string
		StringBuffer sb = new StringBuffer(s.length() + 4);
		char lastc = 0;
		while (used + s.length() > 76) {
			int lastspace = -1;
			for (int i = 0; i < s.length(); i++) {
				if (lastspace != -1 && used + i > 76)
					break;
				c = s.charAt(i);
				if (c == ' ' || c == '\t')
					if (!(lastc == ' ' || lastc == '\t'))
						lastspace = i;
				lastc = c;
			}
			if (lastspace == -1) {
				// no space, use the whole thing
				sb.append(s);
				s = "";
				used = 0;
				break;
			}
			sb.append(s.substring(0, lastspace));
			sb.append("\r\n");
			lastc = s.charAt(lastspace);
			sb.append(lastc);
			s = s.substring(lastspace + 1);
			used = 1;
		}
		sb.append(s);
		return sb.toString();
	}

	/**
	 * Unfold a folded header. Any line breaks that aren't escaped and
	 * are followed by whitespace are removed.
	 * 
	 * @param s
	 *            the string to unfold
	 * @return the unfolded string
	 * @since JavaMail 1.4
	 */
	public static String unfold(String s) {
		if (!foldText)
			return s;

		StringBuffer sb = null;
		int i;
		while ((i = indexOfAny(s, "\r\n")) >= 0) {
			int start = i;
			int l = s.length();
			i++; // skip CR or NL
			if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
				i++; // skip LF
			if (start == 0 || s.charAt(start - 1) != '\\') {
				char c;
				// if next line starts with whitespace, skip all of it
				// XXX - always has to be true?
				if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
					i++; // skip whitespace
					while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
						i++;
					if (sb == null)
						sb = new StringBuffer(s.length());
					if (start != 0) {
						sb.append(s.substring(0, start));
						sb.append(' ');
					}
					s = s.substring(i);
					continue;
				}
				// it's not a continuation line, just leave it in
				if (sb == null)
					sb = new StringBuffer(s.length());
				sb.append(s.substring(0, i));
				s = s.substring(i);
			} else {
				// there's a backslash at "start - 1"
				// strip it out, but leave in the line break
				if (sb == null)
					sb = new StringBuffer(s.length());
				sb.append(s.substring(0, start - 1));
				sb.append(s.substring(start, i));
				s = s.substring(i);
			}
		}
		if (sb != null) {
			sb.append(s);
			return sb.toString();
		} else
			return s;
	}

	/**
	 * Return the first index of any of the characters in "any" in "s",
	 * or -1 if none are found.
	 * 
	 * This should be a method on String.
	 */
	private static int indexOfAny(String s, String any) {
		return indexOfAny(s, any, 0);
	}

	private static int indexOfAny(String s, String any, int start) {
		try {
			int len = s.length();
			for (int i = start; i < len; i++) {
				if (any.indexOf(s.charAt(i)) >= 0)
					return i;
			}
			return -1;
		} catch (StringIndexOutOfBoundsException e) {
			return -1;
		}
	}

	/**
	 * Convert a MIME charset name into a valid Java charset name.
	 * <p>
	 * 
	 * @param charset
	 *            the MIME charset name
	 * @return the Java charset equivalent. If a suitable mapping is
	 *         not available, the passed in charset is itself returned.
	 */
	public static String javaCharset(String charset) {
		if (mime2java == null || charset == null)
			// no mapping table, or charset parameter is null
			return charset;

		String alias = (String) mime2java.get(charset.toLowerCase(Locale.ENGLISH));
		return alias == null ? charset : alias;
	}

	/**
	 * Convert a java charset into its MIME charset name.
	 * <p>
	 * 
	 * Note that a future version of JDK (post 1.2) might provide this
	 * functionality, in which case, we may deprecate this method then.
	 * 
	 * @param charset
	 *            the JDK charset
	 * @return the MIME/IANA equivalent. If a mapping
	 *         is not possible, the passed in charset itself
	 *         is returned.
	 * @since JavaMail 1.1
	 */
	public static String mimeCharset(String charset) {
		if (java2mime == null || charset == null)
			// no mapping table or charset param is null
			return charset;

		String alias = (String) java2mime.get(charset.toLowerCase(Locale.ENGLISH));
		return alias == null ? charset : alias;
	}

	private static String defaultJavaCharset;
	private static String defaultMIMECharset;

	/**
	 * Get the default charset corresponding to the system's current
	 * default locale. If the System property <code>mail.mime.charset</code> is
	 * set, a system charset corresponding to this MIME charset will be
	 * returned.
	 * <p>
	 * 
	 * @return the default charset of the system's default locale,
	 *         as a Java charset. (NOT a MIME charset)
	 * @since JavaMail 1.1
	 */
	public static String getDefaultJavaCharset() {
		if (defaultJavaCharset == null) {
			/*
			 * If mail.mime.charset is set, it controls the default
			 * Java charset as well.
			 */
			String mimecs = null;
			try {
				mimecs = System.getProperty("mail.mime.charset");
			} catch (SecurityException ex) {
			} // ignore it
			if (mimecs != null && mimecs.length() > 0) {
				defaultJavaCharset = javaCharset(mimecs);
				return defaultJavaCharset;
			}

			try {
				defaultJavaCharset = System.getProperty("file.encoding", "8859_1");
			} catch (SecurityException sex) {

				class NullInputStream extends InputStream {
					public int read() {
						return 0;
					}
				}
				InputStreamReader reader = new InputStreamReader(new NullInputStream());
				defaultJavaCharset = reader.getEncoding();
				if (defaultJavaCharset == null)
					defaultJavaCharset = "8859_1";
			}
		}

		return defaultJavaCharset;
	}

	/*
	 * Get the default MIME charset for this locale.
	 */
	static String getDefaultMIMECharset() {
		if (defaultMIMECharset == null) {
			try {
				defaultMIMECharset = System.getProperty("mail.mime.charset");
			} catch (SecurityException ex) {
			} // ignore it
		}
		if (defaultMIMECharset == null)
			defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
		return defaultMIMECharset;
	}

	// Tables to map MIME charset names to Java names and vice versa.
	// XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
	private static Hashtable mime2java;
	private static Hashtable java2mime;

	static {
		java2mime = new Hashtable(40);
		mime2java = new Hashtable(10);

		try {
			// Use this class's classloader to load the mapping file
			// XXX - we should use SecuritySupport, but it's in another package
			InputStream is = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");

			if (is != null) {
				try {
					is = new LineInputStream(is);

					// Load the JDK-to-MIME charset mapping table
					loadMappings((LineInputStream) is, java2mime);

					// Load the MIME-to-JDK charset mapping table
					loadMappings((LineInputStream) is, mime2java);
				} finally {
					try {
						is.close();
					} catch (Exception cex) {
						// ignore
					}
				}
			}
		} catch (Exception ex) {
		}

		// If we didn't load the tables, e.g., because we didn't have
		// permission, load them manually. The entries here should be
		// the same as the default javamail.charset.map.
		if (java2mime.isEmpty()) {
			java2mime.put("8859_1", "ISO-8859-1");
			java2mime.put("iso8859_1", "ISO-8859-1");
			java2mime.put("iso8859-1", "ISO-8859-1");

			java2mime.put("8859_2", "ISO-8859-2");
			java2mime.put("iso8859_2", "ISO-8859-2");
			java2mime.put("iso8859-2", "ISO-8859-2");

			java2mime.put("8859_3", "ISO-8859-3");
			java2mime.put("iso8859_3", "ISO-8859-3");
			java2mime.put("iso8859-3", "ISO-8859-3");

			java2mime.put("8859_4", "ISO-8859-4");
			java2mime.put("iso8859_4", "ISO-8859-4");
			java2mime.put("iso8859-4", "ISO-8859-4");

			java2mime.put("8859_5", "ISO-8859-5");
			java2mime.put("iso8859_5", "ISO-8859-5");
			java2mime.put("iso8859-5", "ISO-8859-5");

			java2mime.put("8859_6", "ISO-8859-6");
			java2mime.put("iso8859_6", "ISO-8859-6");
			java2mime.put("iso8859-6", "ISO-8859-6");

			java2mime.put("8859_7", "ISO-8859-7");
			java2mime.put("iso8859_7", "ISO-8859-7");
			java2mime.put("iso8859-7", "ISO-8859-7");

			java2mime.put("8859_8", "ISO-8859-8");
			java2mime.put("iso8859_8", "ISO-8859-8");
			java2mime.put("iso8859-8", "ISO-8859-8");

			java2mime.put("8859_9", "ISO-8859-9");
			java2mime.put("iso8859_9", "ISO-8859-9");
			java2mime.put("iso8859-9", "ISO-8859-9");

			java2mime.put("sjis", "Shift_JIS");
			java2mime.put("jis", "ISO-2022-JP");
			java2mime.put("iso2022jp", "ISO-2022-JP");
			java2mime.put("euc_jp", "euc-jp");
			java2mime.put("koi8_r", "koi8-r");
			java2mime.put("euc_cn", "euc-cn");
			java2mime.put("euc_tw", "euc-tw");
			java2mime.put("euc_kr", "euc-kr");
		}
		if (mime2java.isEmpty()) {
			mime2java.put("iso-2022-cn", "ISO2022CN");
			mime2java.put("iso-2022-kr", "ISO2022KR");
			mime2java.put("utf-8", "UTF8");
			mime2java.put("utf8", "UTF8");
			mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
			mime2java.put("ja_jp.eucjp", "EUCJIS");
			mime2java.put("euc-kr", "KSC5601");
			mime2java.put("euckr", "KSC5601");
			mime2java.put("us-ascii", "ISO-8859-1");
			mime2java.put("x-us-ascii", "ISO-8859-1");
		}
	}

	private static void loadMappings(LineInputStream is, Hashtable table) {
		String currLine;

		while (true) {
			try {
				currLine = is.readLine();
			} catch (IOException ioex) {
				break; // error in reading, stop
			}

			if (currLine == null) // end of file, stop
				break;
			if (currLine.startsWith("--") && currLine.endsWith("--"))
				// end of this table
				break;

			// ignore empty lines and comments
			if (currLine.trim().length() == 0 || currLine.startsWith("#"))
				continue;

			// A valid entry is of the form <key><separator><value>
			// where, <separator> := SPACE | HT. Parse this
			StringTokenizer tk = new StringTokenizer(currLine, " \t");
			try {
				String key = tk.nextToken();
				String value = tk.nextToken();
				table.put(key.toLowerCase(Locale.ENGLISH), value);
			} catch (NoSuchElementException nex) {
			}
		}
	}

	static final int ALL_ASCII = 1;
	static final int MOSTLY_ASCII = 2;
	static final int MOSTLY_NONASCII = 3;

	/**
	 * Check if the given string contains non US-ASCII characters.
	 * 
	 * @param s
	 *            string
	 * @return ALL_ASCII if all characters in the string
	 *         belong to the US-ASCII charset. MOSTLY_ASCII
	 *         if more than half of the available characters
	 *         are US-ASCII characters. Else MOSTLY_NONASCII.
	 */
	static int checkAscii(String s) {
		int ascii = 0, non_ascii = 0;
		int l = s.length();

		for (int i = 0; i < l; i++) {
			if (nonascii((int) s.charAt(i))) // non-ascii
				non_ascii++;
			else
				ascii++;
		}

		if (non_ascii == 0)
			return ALL_ASCII;
		if (ascii > non_ascii)
			return MOSTLY_ASCII;

		return MOSTLY_NONASCII;
	}

	/**
	 * Check if the given byte array contains non US-ASCII characters.
	 * 
	 * @param b
	 *            byte array
	 * @return ALL_ASCII if all characters in the string
	 *         belong to the US-ASCII charset. MOSTLY_ASCII
	 *         if more than half of the available characters
	 *         are US-ASCII characters. Else MOSTLY_NONASCII.
	 * 
	 *         XXX - this method is no longer used
	 */
	static int checkAscii(byte[] b) {
		int ascii = 0, non_ascii = 0;

		for (int i = 0; i < b.length; i++) {
			// The '&' operator automatically causes b[i] to be promoted
			// to an int, and we mask out the higher bytes in the int
			// so that the resulting value is not a negative integer.
			if (nonascii(b[i] & 0xff)) // non-ascii
				non_ascii++;
			else
				ascii++;
		}

		if (non_ascii == 0)
			return ALL_ASCII;
		if (ascii > non_ascii)
			return MOSTLY_ASCII;

		return MOSTLY_NONASCII;
	}

	/**
	 * Check if the given input stream contains non US-ASCII characters.
	 * Upto <code>max</code> bytes are checked. If <code>max</code> is
	 * set to <code>ALL</code>, then all the bytes available in this
	 * input stream are checked. If <code>breakOnNonAscii</code> is true
	 * the check terminates when the first non-US-ASCII character is
	 * found and MOSTLY_NONASCII is returned. Else, the check continues
	 * till <code>max</code> bytes or till the end of stream.
	 * 
	 * @param is
	 *            the input stream
	 * @param max
	 *            maximum bytes to check for. The special value
	 *            ALL indicates that all the bytes in this input
	 *            stream must be checked.
	 * @param breakOnNonAscii
	 *            if <code>true</code>, then terminate the
	 *            the check when the first non-US-ASCII character
	 *            is found.
	 * @return ALL_ASCII if all characters in the string
	 *         belong to the US-ASCII charset. MOSTLY_ASCII
	 *         if more than half of the available characters
	 *         are US-ASCII characters. Else MOSTLY_NONASCII.
	 */
	static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
		int ascii = 0, non_ascii = 0;
		int len;
		int block = 4096;
		int linelen = 0;
		boolean longLine = false, badEOL = false;
		boolean checkEOL = encodeEolStrict && breakOnNonAscii;
		byte buf[] = null;
		if (max != 0) {
			block = (max == ALL) ? 4096 : Math.min(max, 4096);
			buf = new byte[block];
		}
		while (max != 0) {
			try {
				if ((len = is.read(buf, 0, block)) == -1)
					break;
				int lastb = 0;
				for (int i = 0; i < len; i++) {
					// The '&' operator automatically causes b[i] to
					// be promoted to an int, and we mask out the higher
					// bytes in the int so that the resulting value is
					// not a negative integer.
					int b = buf[i] & 0xff;
					if (checkEOL && ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
						badEOL = true;
					if (b == '\r' || b == '\n')
						linelen = 0;
					else {
						linelen++;
						if (linelen > 998) // 1000 - CRLF
							longLine = true;
					}
					if (nonascii(b)) { // non-ascii
						if (breakOnNonAscii) // we are done
							return MOSTLY_NONASCII;
						else
							non_ascii++;
					} else
						ascii++;
					lastb = b;
				}
			} catch (IOException ioex) {
				break;
			}
			if (max != ALL)
				max -= len;
		}

		if (max == 0 && breakOnNonAscii)
			// We have been told to break on the first non-ascii character.
			// We haven't got any non-ascii character yet, but then we
			// have not checked all of the available bytes either. So we
			// cannot say for sure that this input stream is ALL_ASCII,
			// and hence we must play safe and return MOSTLY_NONASCII

			return MOSTLY_NONASCII;

		if (non_ascii == 0) { // no non-us-ascii characters so far
			// If we're looking at non-text data, and we saw CR without LF
			// or vice versa, consider this mostly non-ASCII so that it
			// will be base64 encoded (since the quoted-printable encoder
			// doesn't encode this case properly).
			if (badEOL)
				return MOSTLY_NONASCII;
			// if we've seen a long line, we degrade to mostly ascii
			else if (longLine)
				return MOSTLY_ASCII;
			else
				return ALL_ASCII;
		}
		if (ascii > non_ascii) // mostly ascii
			return MOSTLY_ASCII;
		return MOSTLY_NONASCII;
	}

	static final boolean nonascii(int b) {
		return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
	}
}

/**
 * An OutputStream that determines whether the data written to
 * it is all ASCII, mostly ASCII, or mostly non-ASCII.
 */
class AsciiOutputStream extends OutputStream {
	private boolean breakOnNonAscii;
	private int ascii = 0, non_ascii = 0;
	private int linelen = 0;
	private boolean longLine = false;
	private boolean badEOL = false;
	private boolean checkEOL = false;
	private int lastb = 0;
	private int ret = 0;

	public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
		this.breakOnNonAscii = breakOnNonAscii;
		checkEOL = encodeEolStrict && breakOnNonAscii;
	}

	public void write(int b) throws IOException {
		check(b);
	}

	public void write(byte b[]) throws IOException {
		write(b, 0, b.length);
	}

	public void write(byte b[], int off, int len) throws IOException {
		len += off;
		for (int i = off; i < len; i++)
			check(b[i]);
	}

	private final void check(int b) throws IOException {
		b &= 0xff;
		if (checkEOL && ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
			badEOL = true;
		if (b == '\r' || b == '\n')
			linelen = 0;
		else {
			linelen++;
			if (linelen > 998) // 1000 - CRLF
				longLine = true;
		}
		if (MimeUtility.nonascii(b)) { // non-ascii
			non_ascii++;
			if (breakOnNonAscii) { // we are done
				ret = MimeUtility.MOSTLY_NONASCII;
				throw new EOFException();
			}
		} else
			ascii++;
		lastb = b;
	}

	/**
	 * Return ASCII-ness of data stream.
	 */
	public int getAscii() {
		if (ret != 0)
			return ret;
		// If we're looking at non-text data, and we saw CR without LF
		// or vice versa, consider this mostly non-ASCII so that it
		// will be base64 encoded (since the quoted-printable encoder
		// doesn't encode this case properly).
		if (badEOL)
			return MimeUtility.MOSTLY_NONASCII;
		else if (non_ascii == 0) { // no non-us-ascii characters so far
			// if we've seen a long line, we degrade to mostly ascii
			if (longLine)
				return MimeUtility.MOSTLY_ASCII;
			else
				return MimeUtility.ALL_ASCII;
		}
		if (ascii > non_ascii) // mostly ascii
			return MimeUtility.MOSTLY_ASCII;
		return MimeUtility.MOSTLY_NONASCII;
	}
}
